#!/bin/bash

MODEL_NAME_OR_PATH=$1
MM_STD=$2
DS=$3
BS=$4
MM_RAND_LR=$5
MM_VIS_LR=$6
LR=$7
OUTPUT_DIR=$8
let GA=32/$BS/$ARNOLD_WORKER_NUM

deepspeed --master_port 34567 dattn/train/train.py \
    --deepspeed ./scripts/$DS.json \
    --model_name_or_path $MODEL_NAME_OR_PATH \
    --mm_vision_tower "google/siglip-so400m-patch14-384" \
    --mm_vision_select_layer -2 \
    --mm_image_aspect_ratio "anyres" \
    --mm_std $MM_STD \
    --mm_image_grid_points "[[1,2],[2,1],[2,2],[1,3],[3,1],[1,4],[4,1]]" \
    --data_path ./data/train/shrcap_filtered.json \
    --image_folder ./data/train \
    --bf16 true \
    --tf32 true \
    --output_dir $OUTPUT_DIR \
    --num_train_epochs 1 \
    --per_device_train_batch_size 4 \
    --gradient_accumulation_steps $GA \
    --eval_strateg "no" \
    --save_strategy "no" \
    --save_total_limit 1 \
    --train_vis true \
    --train_llm true \
    --mm_rand_lr $MM_RAND_LR \
    --mm_vis_lr $MM_VIS_LR \
    --learning_rate $LR \
    --weight_decay 0.1 \
    --adam_beta1 0.9 \
    --adam_beta2 0.95 \
    --adam_epsilon 1e-5 \
    --warmup_ratio 0.03 \
    --lr_scheduler_type "cosine" \
    --logging_strategy "steps" \
    --logging_steps 1 \
    --gradient_checkpointing true \
    --dataloader_num_workers 4 \
    --report_to tensorboard \
    --seed 12345
